/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.lucene.analysis.id;

import static org.apache.lucene.analysis.util.StemmerUtil.deleteN;
import static org.apache.lucene.analysis.util.StemmerUtil.endsWith;
import static org.apache.lucene.analysis.util.StemmerUtil.startsWith;

/**
 * Stemmer for Indonesian.
 *
 * <p>Stems Indonesian words with the algorithm presented in: <i>A Study of Stemming Effects on
 * Information Retrieval in Bahasa Indonesia</i>, Fadillah Z Tala.
 * http://www.illc.uva.nl/Publications/ResearchReports/MoL-2003-02.text.pdf
 */
class IndonesianStemmer {
  private int numSyllables;
  private int flags;
  private static final int REMOVED_KE = 1;
  private static final int REMOVED_PENG = 2;
  private static final int REMOVED_DI = 4;
  private static final int REMOVED_MENG = 8;
  private static final int REMOVED_TER = 16;
  private static final int REMOVED_BER = 32;
  private static final int REMOVED_PE = 64;

  /**
   * Stem a term (returning its new length).
   *
   * <p>Use <code>stemDerivational</code> to control whether full stemming or only light
   * inflectional stemming is done.
   */
  int stem(char[] text, int length, boolean stemDerivational) {
    flags = 0;
    numSyllables = 0;
    for (int i = 0; i < length; i++) if (isVowel(text[i])) numSyllables++;

    if (numSyllables > 2) length = removeParticle(text, length);
    if (numSyllables > 2) length = removePossessivePronoun(text, length);

    if (stemDerivational) length = stemDerivational(text, length);
    return length;
  }

  private int stemDerivational(char[] text, int length) {
    int oldLength = length;
    if (numSyllables > 2) length = removeFirstOrderPrefix(text, length);
    if (oldLength != length) { // a rule is fired
      oldLength = length;
      if (numSyllables > 2) length = removeSuffix(text, length);
      if (oldLength != length) // a rule is fired
      if (numSyllables > 2) length = removeSecondOrderPrefix(text, length);
    } else { // fail
      if (numSyllables > 2) length = removeSecondOrderPrefix(text, length);
      if (numSyllables > 2) length = removeSuffix(text, length);
    }
    return length;
  }

  private boolean isVowel(char ch) {
    switch (ch) {
      case 'a':
      case 'e':
      case 'i':
      case 'o':
      case 'u':
        return true;
      default:
        return false;
    }
  }

  private int removeParticle(char[] text, int length) {
    if (endsWith(text, length, "kah")
        || endsWith(text, length, "lah")
        || endsWith(text, length, "pun")) {
      numSyllables--;
      return length - 3;
    }

    return length;
  }

  private int removePossessivePronoun(char[] text, int length) {
    if (endsWith(text, length, "ku") || endsWith(text, length, "mu")) {
      numSyllables--;
      return length - 2;
    }

    if (endsWith(text, length, "nya")) {
      numSyllables--;
      return length - 3;
    }

    return length;
  }

  private int removeFirstOrderPrefix(char[] text, int length) {
    if (startsWith(text, length, "meng")) {
      flags |= REMOVED_MENG;
      numSyllables--;
      return deleteN(text, 0, length, 4);
    }

    if (startsWith(text, length, "meny") && length > 4 && isVowel(text[4])) {
      flags |= REMOVED_MENG;
      text[3] = 's';
      numSyllables--;
      return deleteN(text, 0, length, 3);
    }

    if (startsWith(text, length, "men")) {
      flags |= REMOVED_MENG;
      numSyllables--;
      return deleteN(text, 0, length, 3);
    }

    if (startsWith(text, length, "mem")) {
      flags |= REMOVED_MENG;
      numSyllables--;
      return deleteN(text, 0, length, 3);
    }

    if (startsWith(text, length, "me")) {
      flags |= REMOVED_MENG;
      numSyllables--;
      return deleteN(text, 0, length, 2);
    }

    if (startsWith(text, length, "peng")) {
      flags |= REMOVED_PENG;
      numSyllables--;
      return deleteN(text, 0, length, 4);
    }

    if (startsWith(text, length, "peny") && length > 4 && isVowel(text[4])) {
      flags |= REMOVED_PENG;
      text[3] = 's';
      numSyllables--;
      return deleteN(text, 0, length, 3);
    }

    if (startsWith(text, length, "peny")) {
      flags |= REMOVED_PENG;
      numSyllables--;
      return deleteN(text, 0, length, 4);
    }

    if (startsWith(text, length, "pen") && length > 3 && isVowel(text[3])) {
      flags |= REMOVED_PENG;
      text[2] = 't';
      numSyllables--;
      return deleteN(text, 0, length, 2);
    }

    if (startsWith(text, length, "pen")) {
      flags |= REMOVED_PENG;
      numSyllables--;
      return deleteN(text, 0, length, 3);
    }

    if (startsWith(text, length, "pem")) {
      flags |= REMOVED_PENG;
      numSyllables--;
      return deleteN(text, 0, length, 3);
    }

    if (startsWith(text, length, "di")) {
      flags |= REMOVED_DI;
      numSyllables--;
      return deleteN(text, 0, length, 2);
    }

    if (startsWith(text, length, "ter")) {
      flags |= REMOVED_TER;
      numSyllables--;
      return deleteN(text, 0, length, 3);
    }

    if (startsWith(text, length, "ke")) {
      flags |= REMOVED_KE;
      numSyllables--;
      return deleteN(text, 0, length, 2);
    }

    return length;
  }

  private int removeSecondOrderPrefix(char[] text, int length) {
    if (startsWith(text, length, "ber")) {
      flags |= REMOVED_BER;
      numSyllables--;
      return deleteN(text, 0, length, 3);
    }

    if (length == 7 && startsWith(text, length, "belajar")) {
      flags |= REMOVED_BER;
      numSyllables--;
      return deleteN(text, 0, length, 3);
    }

    if (startsWith(text, length, "be")
        && length > 4
        && !isVowel(text[2])
        && text[3] == 'e'
        && text[4] == 'r') {
      flags |= REMOVED_BER;
      numSyllables--;
      return deleteN(text, 0, length, 2);
    }

    if (startsWith(text, length, "per")) {
      numSyllables--;
      return deleteN(text, 0, length, 3);
    }

    if (length == 7 && startsWith(text, length, "pelajar")) {
      numSyllables--;
      return deleteN(text, 0, length, 3);
    }

    if (startsWith(text, length, "pe")) {
      flags |= REMOVED_PE;
      numSyllables--;
      return deleteN(text, 0, length, 2);
    }

    return length;
  }

  private int removeSuffix(char[] text, int length) {
    if (endsWith(text, length, "kan")
        && (flags & REMOVED_KE) == 0
        && (flags & REMOVED_PENG) == 0
        && (flags & REMOVED_PE) == 0) {
      numSyllables--;
      return length - 3;
    }

    if (endsWith(text, length, "an")
        && (flags & REMOVED_DI) == 0
        && (flags & REMOVED_MENG) == 0
        && (flags & REMOVED_TER) == 0) {
      numSyllables--;
      return length - 2;
    }

    if (endsWith(text, length, "i")
        && !endsWith(text, length, "si")
        && (flags & REMOVED_BER) == 0
        && (flags & REMOVED_KE) == 0
        && (flags & REMOVED_PENG) == 0) {
      numSyllables--;
      return length - 1;
    }
    return length;
  }
}
